نمودار سوالات ۵ و ۹ knit نشدند. برای دیدن شکل آن ها باید فایل RMD را اجرا نمایید.

Q1

در این سوال معیار فقر را درصد افراد زیر خط فقر در نظر گرفته ام. به وسیله ی آن فقیرترین کشور ها را به دست آورده و سپس درصد افراد زیر خط فقر و حقوق روزانه و امید به زندگی آن ها را به دست آوردم.

w_data = read.csv('data/WDI_csv/WDIData.csv')
w_series = read.csv('data/WDI_csv/WDISeries.csv')
w_country = read.csv('data/WDI_csv/WDICountry.csv')
pov_indicator = w_data %>% filter(Indicator.Code == 'SI.POV.NAHC')
pov_indicator$poverty_ratio = pov_indicator %>% select(starts_with('X')) %>% apply(., 1, mean, na.rm = TRUE)
poor_countries = pov_indicator %>% arrange(desc(poverty_ratio)) %>% top_n(10) 
poor_countries %>% select(Country.Name, poverty_ratio)
##             Country.Name poverty_ratio
## 1      Equatorial Guinea       76.8000
## 2               Zimbabwe       72.3000
## 3             Madagascar       71.6000
## 4                Eritrea       69.0000
## 5  Sao Tome and Principe       67.2500
## 6          Guinea-Bissau       67.0000
## 7       Congo, Dem. Rep.       66.6000
## 8                Burundi       66.0000
## 9              Swaziland       66.0000
## 10              Honduras       62.4125
life_indicator = w_data %>% filter((Country.Name %in% poor_countries$Country.Name) & Indicator.Code == 'SP.DYN.LE00.IN')
life_indicator$life_expectancy = life_indicator %>% select(starts_with("X")) %>% apply(., 1, mean, na.rm = TRUE)
life_indicator %>% select(Country.Name, life_expectancy)
##             Country.Name life_expectancy
## 1                Burundi        48.65196
## 2       Congo, Dem. Rep.        48.79504
## 3      Equatorial Guinea        47.29447
## 4                Eritrea        50.61509
## 5          Guinea-Bissau        47.95014
## 6               Honduras        62.98398
## 7             Madagascar        52.46847
## 8  Sao Tome and Principe        60.64623
## 9              Swaziland        51.99816
## 10              Zimbabwe        54.07304
salary_indicator = w_data %>% filter((Country.Name %in% poor_countries$Country.Name) & Indicator.Code == 'NY.GNP.PCAP.PP.CD')
salary_indicator$salary = salary_indicator %>% select(starts_with('X')) %>% apply(., 1, mean, na.rm = TRUE) 
salary_indicator$salary = salary_indicator$salary / 365
salary_indicator %>% arrange(desc(salary)) %>% hchart('column', hcaes(x = Country.Name, y = salary)) %>% hc_add_theme(hc_theme_monokai())

Q2

در سال های نزدیک ۱۹۹۴ میلادی امید به زندگی کشور روآندا به شدت کاهش یافته که دلیل آن وقوع نسل کشی در کشور روآندا بوده که باعث کشته شدن افراد زیادی شده است.

rwa_life_expectancy = w_data %>% filter(Indicator.Code == 'SP.DYN.LE00.IN' & Country.Code == 'RWA') %>% select(starts_with('X')) %>% t() %>% data.frame() %>% setDT(keep.rownames = TRUE)
rwa_life_expectancy = rwa_life_expectancy[1:57,]
colnames(rwa_life_expectancy)[1] = 'year'
colnames(rwa_life_expectancy)[2] = 'life_expectancy'

life_indicator = w_data %>% filter(Indicator.Code == 'SP.DYN.LE00.IN')
life_indicator = life_indicator[48:264,] %>% melt
colnames(life_indicator)[5] = 'year'
colnames(life_indicator)[6] = 'life_expectancy'

hcboxplot(x = life_indicator$life_expectancy, var = life_indicator$year, outliers = FALSE) %>% hc_chart(type = 'column') %>% hc_add_series(rwa_life_expectancy, 'line', hcaes(x = year, y = life_expectancy), name = 'Rwanda') %>% hc_xAxis(title = list(text = 'Year')) %>% hc_yAxis(title = list(text = 'Life Expectancy')) %>% hc_title(text = 'Life Expectancy per Year') %>% hc_add_theme(hc_theme_monokai())

Q3

می توان نتیجه گرفت که با زیاد شدن امید به زندگی میزان هزینه انجام شده برای سلامت بیشتر بوده است.

health_indicator = w_data %>% filter(Indicator.Code == 'SH.XPD.CHEX.GD.ZS') 
health_indicator$average_cost = health_indicator %>% select(starts_with('X')) %>% apply(., 1, mean, na.rm = TRUE)

life_indicator = w_data %>% filter(Indicator.Code == 'SP.DYN.LE00.IN')
life_indicator$life_expectancy = life_indicator %>% select(starts_with('X')) %>% apply(., 1, mean, na.rm = TRUE)


health_life = inner_join(life_indicator %>% select(Country.Name, life_expectancy), health_indicator %>% select(Country.Name, average_cost), by = 'Country.Name')

ggplot(health_life, aes(x = life_expectancy , y = average_cost)) + geom_point(color = 'red') + geom_smooth(method = lm) 

Q4

بله با توجه به نمودار زیر می توان این موضوع را فهمید.

iran_indicator = w_data %>% filter(Indicator.Code == 'NY.GNP.PCAP.PP.CD' & Country.Code == 'IRN') %>% select(5:63) %>% t()
iran_indicator = data.frame(iran_indicator)
iran_indicator = setDT(iran_indicator, keep.rownames = TRUE)
iran_indicator = iran_indicator %>% filter(between(row_number(), 31, 59))
colnames(iran_indicator)[1] = 'year'
colnames(iran_indicator)[2] = 'purchasing_power'
hchart(iran_indicator, 'line', hcaes(x = year, y = purchasing_power)) %>% hc_add_theme(hc_theme_monokai())

Q5

در نمودار های زیر می توانید این مقایسه ها را ببینید.

finance_indicators = w_series %>% filter(Series.Code %in% c('FP.CPI.TOTL.ZG', 'NY.GDP.MKTP.KD.ZG', 'NY.GDP.MKTP.KD', 'NY.GDP.FCST.CD', 'BX.GSR.TOTL.CD', 'CM.MKT.TRAD.GD.ZS', 'FR.INR.LNDP', 'CM.MKT.LCAP.GD.ZS', 'FR.INR.DPST', 'NE.GDI.STKB.CD', 'NE.GDI.TOTL.CD', 'NE.IMP.GNFS.ZS', 'NE.TRD.GNFS.ZS', 'NV.IND.MANF.ZS', 'NV.IND.TOTL.ZS', 'NY.TAX.NIND.CD', 'NY.GSR.NFCY.CD', 'TX.VAL.MRCH.WL.CD', 'BX.KLT.DINV.WD.GD.ZS' , 'FM.LBL.BMNY.GD.ZS')) %>% select(Series.Code, Indicator.Name)

lst = list()

sapply(1:20, function(index){
  df = w_data %>% filter(Indicator.Code == finance_indicators$Series.Code[index]) %>% filter(between(row_number(), 48, 264)) %>% melt
  colnames(df)[5] = 'year'
  colnames(df)[6] = 'name'
  iran_df = w_data %>% filter(Indicator.Code == finance_indicators$Series.Code[index] & Country.Code == 'IRN') %>% select(starts_with('X')) %>% t() %>% data.frame() %>% setDT(keep.rownames = TRUE) %>% filter(between(row_number(), 1, 57))
  colnames(iran_df)[1] = 'year'
  colnames(iran_df)[2] = 'name'
 lst[[index]] = hcboxplot(x = df$name, var = df$year, outliers = FALSE) %>% hc_chart(type = 'column') %>% print(hc_add_series(name = 'Iran', iran_df, 'line', hcaes(x = year, y = name)) %>% hc_yAxis(title = list(text = finance_indicators$Indicator.Name[index])) %>% hc_xAxis(title = list(text = 'Year')) %>% hc_add_theme(hc_theme_monokai()))
})
##               [,1]   [,2]   [,3]   [,4]   [,5]   [,6]   [,7]   [,8]  
## x             List,6 List,6 List,6 List,6 List,6 List,6 List,6 List,6
## width         NULL   NULL   NULL   NULL   NULL   NULL   NULL   NULL  
## height        NULL   NULL   NULL   NULL   NULL   NULL   NULL   NULL  
## sizingPolicy  List,6 List,6 List,6 List,6 List,6 List,6 List,6 List,6
## dependencies  NULL   NULL   NULL   NULL   NULL   NULL   NULL   NULL  
## elementId     NULL   NULL   NULL   NULL   NULL   NULL   NULL   NULL  
## preRenderHook NULL   NULL   NULL   NULL   NULL   NULL   NULL   NULL  
## jsHooks       List,0 List,0 List,0 List,0 List,0 List,0 List,0 List,0
##               [,9]   [,10]  [,11]  [,12]  [,13]  [,14]  [,15]  [,16] 
## x             List,6 List,6 List,6 List,6 List,6 List,6 List,6 List,6
## width         NULL   NULL   NULL   NULL   NULL   NULL   NULL   NULL  
## height        NULL   NULL   NULL   NULL   NULL   NULL   NULL   NULL  
## sizingPolicy  List,6 List,6 List,6 List,6 List,6 List,6 List,6 List,6
## dependencies  NULL   NULL   NULL   NULL   NULL   NULL   NULL   NULL  
## elementId     NULL   NULL   NULL   NULL   NULL   NULL   NULL   NULL  
## preRenderHook NULL   NULL   NULL   NULL   NULL   NULL   NULL   NULL  
## jsHooks       List,0 List,0 List,0 List,0 List,0 List,0 List,0 List,0
##               [,17]  [,18]  [,19]  [,20] 
## x             List,6 List,6 List,6 List,6
## width         NULL   NULL   NULL   NULL  
## height        NULL   NULL   NULL   NULL  
## sizingPolicy  List,6 List,6 List,6 List,6
## dependencies  NULL   NULL   NULL   NULL  
## elementId     NULL   NULL   NULL   NULL  
## preRenderHook NULL   NULL   NULL   NULL  
## jsHooks       List,0 List,0 List,0 List,0

Q6

با توجه به نتایج به دست آمده ایران در دسته اول قرار دارد.

finance_indicators_data = w_data %>% filter(Indicator.Code %in% finance_indicators$Series.Code) %>% filter(between(row_number(), 941, 5280)) %>% melt
colnames(finance_indicators_data)[5] = 'year'
colnames(finance_indicators_data)[6] = 'val'
finance_indicators_data = finance_indicators_data %>% select(Country.Name, Indicator.Name, year, val) %>% reshape(., timevar = 'Indicator.Name', direction = 'wide', idvar = c('Country.Name', 'year')) %>% group_by(Country.Name) %>% summarise_all(funs(mean(., na.rm = TRUE)))

finance_indicators_data[3:22] = data.frame(apply(finance_indicators_data[3:22], 2, function(index){
  index = as.numeric(as.character(index))
  index[is.na(index)] = mean(index, na.rm = TRUE)
  index
}))
Countries = finance_indicators_data$Country.Name
finance_indicators_data = finance_indicators_data[,3:22]
rownames(finance_indicators_data) = Countries
finance_indicators_data = scale(finance_indicators_data)
kmeans_res = kmeans(finance_indicators_data, centers = 3) 
kmeans_res$cluster[91]
## Iran, Islamic Rep. 
##                  2
fviz_cluster(kmeans_res, finance_indicators_data)

Q7

بله چون در راستای مولفه های PCA تشکیل شده اند.

pca = prcomp(finance_indicators_data)
fviz_pca_biplot(pca, habillage = as.factor(kmeans_res$cluster))

Q8

iran_economy = w_data %>% filter(Indicator.Code == 'NY.GDP.MKTP.KD.ZG' & Country.Code == 'IRN') %>% select(starts_with('X')) %>% t() %>% data.frame()
colnames(iran_economy)[1] = 'economic_grow'
iran_economy[is.na(iran_economy)] = mean(iran_economy$economic_grow, na.rm = TRUE)
x = sapply(2:15, function(year){
  regression = data.frame()
  for (i in 1:nrow(iran_economy) - year)regression = rbind(regression, iran_economy$economic_grow[i:i + year])
  cat(sprintf('%d years used for regression\n', year))
  print(mean(summary(lm(regression[,ncol(regression)] ~ ., data = regression))$residuals ^ 2))
  return(0)
})
## 2 years used for regression
## [1] 6.730351e-29
## 3 years used for regression
## [1] 6.730351e-29
## 4 years used for regression
## [1] 6.730351e-29
## 5 years used for regression
## [1] 6.730351e-29
## 6 years used for regression
## [1] 6.730351e-29
## 7 years used for regression
## [1] 6.730351e-29
## 8 years used for regression
## [1] 6.730351e-29
## 9 years used for regression
## [1] 6.730351e-29
## 10 years used for regression
## [1] 6.730351e-29
## 11 years used for regression
## [1] 6.730351e-29
## 12 years used for regression
## [1] 6.730351e-29
## 13 years used for regression
## [1] 6.730351e-29
## 14 years used for regression
## [1] 6.730351e-29
## 15 years used for regression
## [1] 6.730351e-29

Q9

problem567_generic = function(indicators_code){
  indicators = w_series %>% filter(Series.Code %in% indicators_code) %>% select(Series.Code, Indicator.Name)
  sapply(1:20, function(index){
    df = w_data %>% filter(Indicator.Code == indicators$Series.Code[index]) %>% filter(between(row_number(), 48, 264)) %>% melt
  colnames(df)[5] = 'year'
  colnames(df)[6] = 'name'
  iran_df = w_data %>% filter(Indicator.Code == indicators$Series.Code[index] & Country.Code == 'IRN') %>% select(starts_with('X')) %>% t() %>% data.frame() %>% setDT(keep.rownames = TRUE) %>% filter(between(row_number(), 1, 57))
  colnames(iran_df)[1] = 'year'
  colnames(iran_df)[2] = 'name'
  print(hcboxplot(x = df$name, var = df$year, outliers = FALSE) %>% hc_chart(type = 'column') %>% hc_add_series(name = 'Iran', iran_df, 'line', hcaes(x = year, y = name)) %>% hc_yAxis(title = list(text = indicators$Indicator.Name[index])) %>% hc_xAxis(title = list(text = 'Year')) %>% hc_add_theme(hc_theme_monokai()))
})
  indicators_data = w_data %>% filter(Indicator.Code %in% indicators$Series.Code) %>% filter(between(row_number(), 941, 5280)) %>% melt
  colnames(indicators_data)[5] = 'year'
  colnames(indicators_data)[6] = 'val'
  indicators_data = indicators_data %>% select(Country.Name, Indicator.Name, year, val) %>% reshape(., timevar = 'Indicator.Name', direction = 'wide', idvar = c('Country.Name', 'year')) %>% group_by(Country.Name) %>% summarise_all(funs(mean(., na.rm = TRUE)))

  indicators_data[3:22] = data.frame(apply(indicators_data[3:22], 2, function(index){
    index = as.numeric(as.character(index))
    index[is.na(index)] = mean(index, na.rm = TRUE)
    index
  }))
  Countries = indicators_data$Country.Name
  indicators_data = indicators_data[,3:22]
  rownames(indicators_data) = Countries
  indicators_data = scale(indicators_data)
  kmeans_res = kmeans(indicators_data, centers = 3) 
  print(kmeans_res$cluster[91])
  print(fviz_cluster(kmeans_res, indicators_data))
  pca = prcomp(finance_indicators_data)
  print(fviz_pca_biplot(pca, habillage = as.factor(kmeans_res$cluster)))
}
health = problem567_generic(c('SH.DYN.NMRT', 'SH.DTH.MORT', 'SP.DYN.AMRT.FE', 'SP.DYN.AMRT.MA', 'SP.DYN.LE00.FE.IN', 'SP.DYN.LE00.MA.IN','SN.ITK.DFCT', 'SP.DYN.TO65.FE.ZS', 'SP.DYN.TO65.MA.ZS', 'SH.MMR.RISK.ZS', 'SP.POP.BRTH.MF', 'SP.POP.80UP.MA.5Y', 'SP.POP.80UP.FE.5Y', 'SN.ITK.DEFC.ZS',  'SH.ANM.NPRG.ZS','SP.POP.0014.TO', 'SP.POP.GROW', 'SP.POP.DPND', 'SH.DTH.NMRT', 'SH.MED.NUMW.P3'))
## Iran, Islamic Rep. 
##                  2

edu = problem567_generic(c('SE.XPD.TOTL.GD.ZS', 'SE.XPD.TERT.PC.ZS', 'SE.XPD.PRIM.ZS', 'SE.XPD.MTOT.ZS', 'SE.XPD.CTOT.ZS', 'SE.TER.TCHR.FE.ZS', 'SE.TER.ENRR', 'SE.TER.ENRL.TC.ZS', 'SE.SEC.TCHR', 'SE.SEC.ENRL', 'SE.PRM.UNER.MA.ZS', 'SE.PRM.TENR', 'SE.ADT.1524.LT.ZS', 'SE.COM.DURS', 'SE.PRE.DURS', 'SE.PRM.UNER.FE.ZS', 'SE.PRM.AGES', 'SE.PRM.ENRR.FE', 'SE.PRM.ENRR.MA', 'SE.PRM.GINT.ZS'))
## Iran, Islamic Rep. 
##                  3

Q10

indicators_code <- c('SE.XPD.TOTL.GD.ZS', 'SE.XPD.TERT.PC.ZS', 'SE.XPD.PRIM.ZS', 'SE.XPD.MTOT.ZS', 'SE.XPD.CTOT.ZS', 'SE.TER.TCHR.FE.ZS', 'SE.TER.ENRR', 'SE.TER.ENRL.TC.ZS', 'SE.SEC.TCHR', 'SE.SEC.ENRL', 'SE.PRM.UNER.MA.ZS', 'SE.PRM.TENR', 'SE.ADT.1524.LT.ZS', 'SE.COM.DURS', 'SE.PRE.DURS', 'SE.PRM.UNER.FE.ZS', 'SE.PRM.AGES', 'SE.PRM.ENRR.FE', 'SE.PRM.ENRR.MA', 'SE.PRM.GINT.ZS', 'SH.DYN.NMRT', 'SH.DTH.MORT', 'SP.DYN.AMRT.FE', 'SP.DYN.AMRT.MA', 'SP.DYN.LE00.FE.IN', 'SP.DYN.LE00.MA.IN','SN.ITK.DFCT', 'SP.DYN.TO65.FE.ZS', 'SP.DYN.TO65.MA.ZS', 'SH.MMR.RISK.ZS', 'SP.POP.BRTH.MF', 'SP.POP.80UP.MA.5Y', 'SP.POP.80UP.FE.5Y', 'SN.ITK.DEFC.ZS',  'SH.ANM.NPRG.ZS','SP.POP.0014.TO', 'SP.POP.GROW', 'SP.POP.DPND', 'SH.DTH.NMRT', 'SH.MED.NUMW.P3', 'FP.CPI.TOTL.ZG', 'NY.GDP.MKTP.KD.ZG', 'NY.GDP.MKTP.KD', 'NY.GDP.FCST.CD', 'BX.GSR.TOTL.CD', 'CM.MKT.TRAD.GD.ZS', 'FR.INR.LNDP', 'CM.MKT.LCAP.GD.ZS', 'FR.INR.DPST', 'NE.GDI.STKB.CD', 'NE.GDI.TOTL.CD', 'NE.IMP.GNFS.ZS', 'NE.TRD.GNFS.ZS', 'NV.IND.MANF.ZS', 'NV.IND.TOTL.ZS', 'NY.TAX.NIND.CD', 'NY.GSR.NFCY.CD', 'TX.VAL.MRCH.WL.CD', 'BX.KLT.DINV.WD.GD.ZS' , 'FM.LBL.BMNY.GD.ZS')

indicators = w_data %>% filter(Indicator.Code %in% indicators_code)
indicators = indicators[2821:15840,5:63]

indicators = data.frame(apply(indicators, 2, function(index){
    index = as.numeric(as.character(index))
    index[is.na(index)] = mean(index, na.rm = TRUE)
    index
  }))

distances = stats::dist(indicators,method = 'euclidean')
cluster_plot = hclust(distances, method = 'complete')
plot(cluster_plot, method = 'compelete')

Q11

در نمودار زیر امید به زندگی مردم ایران نسبت به کل دنیا نشان داده شده است. با توجه به نمودار می توان فهمید که در دوران جنگ تحمیلی امید به زندگی مردم ایران بسیار کاهش یافته است.

iran_life_expectancy = w_data %>% filter(Indicator.Code == 'SP.DYN.LE00.IN' & Country.Code == 'IRN') %>% select(starts_with('X')) %>% t() %>% data.frame() %>% setDT(keep.rownames = TRUE)
iran_life_expectancy = iran_life_expectancy[1:57,]
colnames(iran_life_expectancy)[1] = 'year'
colnames(iran_life_expectancy)[2] = 'life_expectancy'

life_indicator = w_data %>% filter(Indicator.Code == 'SP.DYN.LE00.IN')
life_indicator = life_indicator[48:264,] %>% melt
colnames(life_indicator)[5] = 'year'
colnames(life_indicator)[6] = 'life_expectancy'

hcboxplot(x = life_indicator$life_expectancy, var = life_indicator$year, outliers = FALSE) %>% hc_chart(type = 'column') %>% hc_add_series(iran_life_expectancy, 'line', hcaes(x = year, y = life_expectancy), name = 'IRAN') %>% hc_xAxis(title = list(text = 'Year')) %>% hc_yAxis(title = list(text = 'Life Expectancy')) %>% hc_title(text = 'Life Expectancy per Year') %>% hc_add_theme(hc_theme_monokai())

در این نمودار هزینه های نظامی ایران به همراه میانگین جهانی آن رسم شده است. با توجه به شکل می توان فهمید که در دوران جنگ تحمیلی این مقادیر بسیار افزایش یافته است.

iran_military_expenditure = w_data %>% filter(Indicator.Code == 'MS.MIL.XPND.GD.ZS' & Country.Code == 'IRN') %>% select(starts_with('X')) %>% t() %>% data.frame() %>% setDT(keep.rownames = TRUE)
iran_military_expenditure = iran_military_expenditure[1:57,]
colnames(iran_military_expenditure)[1] = 'year'
colnames(iran_military_expenditure)[2] = 'military_expenditure'

military_indicator = w_data %>% filter(Indicator.Code == 'MS.MIL.XPND.GD.ZS')
military_indicator = military_indicator[48:264,] %>% melt
colnames(military_indicator)[5] = 'year'
colnames(military_indicator)[6] = 'military_expenditure'

hcboxplot(x = military_indicator$military_expenditure, var = military_indicator$year, outliers = FALSE) %>% hc_chart(type = 'column') %>% hc_add_series(iran_military_expenditure, 'line', hcaes(x = year, y = military_expenditure), name = 'IRAN') %>% hc_xAxis(title = list(text = 'Year')) %>% hc_yAxis(title = list(text = 'Military expenditure')) %>% hc_title(text = 'Military expenditure per Year') %>% hc_add_theme(hc_theme_monokai())

در این قسمت به پیدا کردن تعداد قتل های عمدی در هر ۱۰۰۰۰۰ نفر از جمعیت پرداخته ایم. بیشترین تعداد قتل ها متعلق به کشورهای آمریکای لاتین و جنوبی است که قاچاق مواد مخدر در آن ها رایج است.

homicide_indicator = w_data %>% filter(Indicator.Code == 'VC.IHR.PSRC.P5')
homicide_indicator$homicide = homicide_indicator %>% select(starts_with('X')) %>% apply(., 1, mean, na.rm = TRUE)
homicide_indicator = homicide_indicator %>% arrange(desc(homicide)) %>% top_n(10) 
homicide_indicator %>% select(Country.Name, homicide)
##             Country.Name homicide
## 1            El Salvador 68.44866
## 2               Honduras 63.67554
## 3               Colombia 47.34863
## 4                Jamaica 44.54665
## 5                   Iraq 43.36667
## 6           South Africa 42.37494
## 7                Lesotho 39.55000
## 8          Venezuela, RB 39.25473
## 9  Virgin Islands (U.S.) 35.62500
## 10             Guatemala 34.88277